# CLEAN WORKSPACE AND LOAD PACKAGES --------------------------------------------

rm(list = ls())
library(datasim)
library(tidyverse)

# SIMULATE MULTIVARIATE SPATIAL DATA -------------------------------------------

# set.seed(4)
Corr <- matrix(c(1, -0.3, 0, -0.3, 1, 0.3, 0, 0.3, 1), nrow = 3)
sigmas <- rep(0.4^0.5, 3)
D <- diag(sigmas)
Cov <- D %*% Corr %*% D

# beta <- c(-0.5, 0, 0.5)
beta <- c(0, 0, 0)
variance <- 0.6 * matrix(c(1, 0, 0, 0, 1, 0, 0, 0, 1), nrow = 3)
cor.model <- "exp_cor"
cor.params <- list(list(phi = 0.04), list(phi = 0.04), list(phi = 0.1))

f <- list(
  mean ~ mfe(x1, beta = get("beta")) +
    mre(factor(id), sigma = get("Cov")) +
    mgp(list(s1), variance = get("variance"), cor.model = get("cor.model"),
        cor.params = get("cor.params")),
  sd ~ I(0)
  )

n <- 300
m <- 3
(data_geo <- sim_model(formula = f, n = n, responses = m))
## # A tibble: 900 x 9
##       id      x1     s1 mre.factor.mean mgp.list.mean   mean    sd
##    <int>   <dbl>  <dbl>           <dbl>         <dbl>  <dbl> <dbl>
##  1     1  1.17   0.848            1.19         -0.841  0.346    0.
##  2     2  0.396  0.619           -0.580        -0.629 -1.21     0.
##  3     3 -0.0194 0.532            0.381        -0.894 -0.514    0.
##  4     4  0.0486 0.512            0.288        -0.807 -0.519    0.
##  5     5  0.364  0.983            0.647         0.450  1.10     0.
##  6     6  0.762  0.757            1.20          0.367  1.56     0.
##  7     7 -0.166  0.0637           0.609         1.02   1.63     0.
##  8     8  0.971  0.187            0.393         1.95   2.34     0.
##  9     9 -0.643  0.417           -1.80          1.26  -0.542    0.
## 10    10  0.152  0.720            0.167         1.02   1.18     0.
## # ... with 890 more rows, and 2 more variables: response <dbl>,
## #   response_label <int>
# knitr::kable(head(data_model, 10))

X <- matrix(rnorm(20), 10, 2)

# VISUALIZE MULTIVARIATE SPATIAL DATA ------------------------------------------

ggplot(data_geo, aes(x1, response)) +
  geom_smooth(aes(col = factor(response_label))) +
  geom_point(aes(col = factor(response_label)))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(data_geo, aes(s1, mgp.list.mean)) +
  geom_line(aes(col = factor(response_label)))

data_geo %>%
  dplyr::select(id, mre.factor.mean, response_label) %>%
  spread(response_label, mre.factor.mean) %>%
  dplyr::select(-id) %>%
  GGally::ggpairs(aes(fill = "any"))

data_geo_wide <- data_geo %>%
  dplyr::rename(ability = response, id_person = id) %>%
  gather(var, value, mre.factor.mean:ability) %>%
  mutate(var = paste0(var, response_label)) %>%
  select(-response_label) %>%
  spread(var, value)


# SIMULATE ITEM FACTOR DATA ----------------------------------------------------

q <- 10
init_data <- purrr::map(1:q, ~ data_geo_wide) %>%
  purrr::reduce(rbind)

# n <- 300
difficulty <- matrix((1:q - 5)/10 * 2, nrow = 1)
discrimination1 <- seq(0.4, 1.5, length.out = q)
discrimination2 <- runif(q, 0, 2)
discrimination3 <- runif(q, 0, 2)
discrimination1[1] <- 1
discrimination1[c(3, 5, 8)] <- 0
discrimination2[1:2] <- c(0, 1)
discrimination2[c(4, 5, 10)] <- 0
# discrimination3[1:3] <- c(0, 0, 1)
# discrimination1 <- discrimination1 * 0.3
# discrimination2 <- discrimination2 * 0.3
cbind(discrimination1, discrimination2, discrimination3)
##       discrimination1 discrimination2 discrimination3
##  [1,]       1.0000000       0.0000000       1.4056214
##  [2,]       0.5222222       1.0000000       1.7647846
##  [3,]       0.0000000       0.4043801       0.6430302
##  [4,]       0.7666667       0.0000000       1.7204534
##  [5,]       0.0000000       0.0000000       1.8861882
##  [6,]       1.0111111       1.3665095       1.2067074
##  [7,]       1.1333333       1.1069779       0.4768627
##  [8,]       0.0000000       1.6809371       0.5209572
##  [9,]       1.3777778       0.9677522       1.1553979
## [10,]       1.5000000       0.0000000       1.3248507
f <- list(
  prob ~ mfa(ones, beta = get("difficulty")) +
    mfe(ability1, beta = get("discrimination1")) +
    mfe(ability2, beta = get("discrimination2")),
  # + mfe(ability3, beta = get("discrimination3")),
  size ~ I(1)
  )

data_long <- sim_model(formula = f,
                        link_inv = list(pnorm, identity),
                        generator = rbinom,
                        responses = q,
                        n = n,
                        init_data = init_data
                        )

data_long <- dplyr::rename(data_long, subject = id,
                           item = response_label, y = response)

# VISUALIZE ITEM FACTOR DATA ---------------------------------------------------

explor <- data_long %>%
  group_by(subject) %>%
  summarize(endorse = mean(y),
            ability1 = unique(ability1),
            ability2 = unique(ability2),
            # ability3 = unique(ability3),
            x1 = unique(x1))
ggplot(explor, aes(ability1, endorse)) + geom_point(alpha = 0.5)

ggplot(explor, aes(ability2, endorse)) + geom_point(alpha = 0.5)

# ggplot(explor, aes(ability3, endorse)) + geom_point(alpha = 0.5)
# ggplot(explor, aes(x1, endorse)) + geom_point(alpha = 0.5)

# PREPARE DATA -----------------------------------------------------------------

response <- data_long$y
coordinates <- dplyr::select(data_geo_wide, s1)
dist <- as.matrix(dist(coordinates))
# dist <- as.matrix(dist(dplyr::select(data_geo_wide, s1)[order(data_geo_wide$s1),]))
# dist <- dist[order(data_geo_wide$s1),]
n
## [1] 300
q
## [1] 10
m <- 2
# iter <- 5 * 10 ^ 4
iter <- 5 * 10 ^ 3
cor.params <- c(0.04, 0.04)
sig.params <- c(0.6 ^ 0.5, 0.6 ^ 0.5)
fix.sigma <- 0.4^0.5
# sigma_prop <- matrix(c(0.138, -0.023, -0.023, 0.1), 2) * 2.38 ^ 2 / 2
sigma_prop <- 0.001 * diag(5)
disc_mat <- cbind(discrimination1, discrimination2)
L_a <- lower.tri(disc_mat, diag = TRUE) * 1
T_gp <- diag(m)

# RUN --------------------------------------------------------------------------

Rcpp::sourceCpp("../src/mirt-gibss-sp.cpp")
source("../R/ggplot-mcmc.R")
Rcpp::sourceCpp("../src/ifa-driver.cpp")
source("../R/spmirt.R")

# # set.seed(5)
# system.time(
#   samples <- ifa_gibbs_sp(response, dist, n, q, m, cor.params, sig.params,
#                           Corr[1:2, 1:2], fix.sigma, sigma_prop, L_a, T_gp, 0.234,
#                           iter)
# )

system.time(
  samples <- spmirt(
    response = response,  predictors = NULL, coordinates = coordinates,
    nobs = n, nitems = q, nfactors = 2, niter = iter, thin = 1,
    constrains = list(A = L_a, W = NULL, V_sd = sigmas[1:2]),
    adaptive = list(Sigma = NULL, Sigma_R = NULL, Sigma_gp_sd = NULL,
                    Sigma_gp_phi = NULL, scale = 1, C = 0.7, alpha = 0.8,
                    accep_prob = 0.234),
    sigmas_gp_opt = list(initial = 1, prior_mean = NULL, prior_sd = NULL),
    phi_gp_opt = list(initial = 0.08, prior_mean = 0.1, prior_sd = NULL))
  )
##    user  system elapsed 
## 458.101 367.841 244.175
attr(samples, "model_info")[-c(1, 2, 3)]
## $nobs
## [1] 300
## 
## $nitems
## [1] 10
## 
## $nfactors
## [1] 2
## 
## $niter
## [1] 5000
## 
## $thin
## [1] 1
## 
## $constrain_L
##       [,1] [,2]
##  [1,]    1    0
##  [2,]    1    1
##  [3,]    1    1
##  [4,]    1    1
##  [5,]    1    1
##  [6,]    1    1
##  [7,]    1    1
##  [8,]    1    1
##  [9,]    1    1
## [10,]    1    1
## 
## $constrain_T
##      [,1] [,2]
## [1,]    1    0
## [2,]    0    1
## 
## $constrain_V_sd
## [1] 0.6324555 0.6324555
## 
## $adap_Sigma
##       [,1]  [,2]  [,3]  [,4]  [,5]
## [1,] 0.001 0.000 0.000 0.000 0.000
## [2,] 0.000 0.001 0.000 0.000 0.000
## [3,] 0.000 0.000 0.001 0.000 0.000
## [4,] 0.000 0.000 0.000 0.001 0.000
## [5,] 0.000 0.000 0.000 0.000 0.001
## 
## $adap_scale
## [1] 1
## 
## $adap_C
## [1] 0.7
## 
## $adap_alpha
## [1] 0.8
## 
## $adap_accep_prob
## [1] 0.234
## 
## $c_initial
##  [1]  0.53346191  0.53458595  1.83161263 -0.24242993  0.05979608
##  [6]  0.26796501 -0.14401172  0.92308038  1.29509789  0.97783649
## 
## $c_prior_mean
##  [1] 0 0 0 0 0 0 0 0 0 0
## 
## $c_prior_sd
##  [1] 1 1 1 1 1 1 1 1 1 1
## 
## $A_initial
##       [,1] [,2]
##  [1,]    1    0
##  [2,]    0    1
##  [3,]    0    0
##  [4,]    0    0
##  [5,]    0    0
##  [6,]    0    0
##  [7,]    0    0
##  [8,]    0    0
##  [9,]    0    0
## [10,]    0    0
## 
## $A_prior_mean
##       [,1] [,2]
##  [1,]    1    0
##  [2,]    0    1
##  [3,]    0    0
##  [4,]    0    0
##  [5,]    0    0
##  [6,]    0    0
##  [7,]    0    0
##  [8,]    0    0
##  [9,]    0    0
## [10,]    0    0
## 
## $A_prior_sd
##       [,1] [,2]
##  [1,] 0.45 1.00
##  [2,] 1.00 0.45
##  [3,] 1.00 1.00
##  [4,] 1.00 1.00
##  [5,] 1.00 1.00
##  [6,] 1.00 1.00
##  [7,] 1.00 1.00
##  [8,] 1.00 1.00
##  [9,] 1.00 1.00
## [10,] 1.00 1.00
## 
## $R_initial
##      [,1] [,2]
## [1,]    1    0
## [2,]    0    1
## 
## $R_prior_eta
## [1] 1.5
## 
## $B_initial
##      [,1]
## [1,]   NA
## 
## $B_prior_mean
##      [,1]
## [1,]   NA
## 
## $B_prior_sd
##      [,1]
## [1,]   NA
## 
## $sigmas_gp_initial
## [1] 1 1
## 
## $sigmas_gp_mean
## [1] 0.6 0.6
## 
## $sigmas_gp_sd
## [1] 0.2 0.2
## 
## $phi_gp_initial
## [1] 0.08 0.08
## 
## $phi_gp_mean
## [1] 0.1 0.1
## 
## $phi_gp_sd
## [1] 0.2 0.2
## 
## $model_type
## [1] "spifa"
samples_tib <- as_tibble.spmirt.list(samples, iter/2)
summary(samples_tib)
## # A tibble: 3,635 x 6
##    Parameters  `2.5%`   `10%`   `50%`  `90%` `97.5%`
##    <fct>        <dbl>   <dbl>   <dbl>  <dbl>   <dbl>
##  1 c[1]       -1.16   -1.00   -0.707  -0.357  -0.181
##  2 c[2]       -1.04   -0.897  -0.564  -0.307  -0.179
##  3 c[3]       -0.824  -0.703  -0.541  -0.366  -0.273
##  4 c[4]       -0.340  -0.240  -0.0434  0.205   0.359
##  5 c[5]       -0.0867 -0.0271  0.0809  0.192   0.245
##  6 c[6]       -0.230  -0.0857  0.247   0.567   0.802
##  7 c[7]       -0.249  -0.0413  0.351   0.803   1.13 
##  8 c[8]       -0.197   0.0555  0.479   0.965   1.21 
##  9 c[9]        0.517   0.667   0.942   1.28    1.54 
## 10 c[10]       0.759   0.886   1.23    1.71    1.94 
## # ... with 3,625 more rows
samples_long <- gather(samples_tib)

as_tibble.spmirt.list(samples, 0, 10, "c") %>%
  gg_trace(alpha = 0.6)

as_tibble.spmirt.list(samples, 0, 10, "a") %>%
  gg_trace(alpha = 0.6)

as_tibble.spmirt.list(samples, iter/2, 10, "a") %>%
  gg_density(alpha = 0.5, ridges = TRUE, aes(fill = Parameters), scale = 4)
## Picking joint bandwidth of 0.089

as_tibble.spmirt.list(samples, iter/2, 10, "theta") %>%
  dplyr::select(1:100) %>%
  gg_density(alpha = 0.5, ridges = TRUE, aes(fill = Parameters), scale = 4)
## Picking joint bandwidth of 0.133

as_tibble.spmirt.list(samples, 0, 10, "theta") %>%
  select(1:10) %>%
  gg_trace(alpha = 0.6)

as_tibble.spmirt.list(samples, 0, 1, "corr") %>%
  gg_trace(alpha = 0.6)

as_tibble.spmirt.list(samples, 0, 1, "mgp_sd") %>%
  gg_trace(alpha = 0.6)

as_tibble.spmirt.list(samples, 0, 1, "mgp_phi") %>%
  gg_trace(alpha = 0.6)

as_tibble.spmirt.list(samples, 0, 10, "a") %>%
  gg_density2d(`Discrimination 1`, `Discrimination 2`, each = 10,
               keys = c("Item ", "Discrimination "),
               highlight = c(discrimination1, discrimination2))
## Warning: Computation failed in `stat_density2d()`:
## bandwidths must be strictly positive

as_tibble.spmirt.list(samples, 0, 10, "a") %>%
  gg_scatter(`Discrimination 1`, `Discrimination 2`, each = 10,
               keys = c("Item ", "Discrimination "),
               highlight = c(discrimination1, discrimination2))

as_tibble.spmirt.list(samples, iter/ 2, select = "a") %>%
  summary() %>%
  mutate(param = c(discrimination1, discrimination2)) %>%
  gg_errorbarh() +
  geom_point(aes(param, Parameters), col = 3)

as_tibble.spmirt.list(samples, iter/2, select = "c") %>%
  summary() %>%
  mutate(param = as.numeric(difficulty)) %>%
  gg_errorbarh() +
  geom_point(aes(param, Parameters), col = 3)

as_tibble.spmirt.list(samples, iter/2, select = "theta") %>%
  dplyr::select(1:300) %>%
  summary() %>%
  mutate(param = data_geo$response[1:300]) %>%
  gg_errorbarh(sorted = TRUE) +
  geom_point(aes(x = param), col = 3)

as_tibble.spmirt.list(samples, iter/2, select = "theta") %>%
  dplyr::select(301:600) %>%
  summary() %>%
  mutate(param = data_geo$response[301:600]) %>%
  gg_errorbarh(sorted = TRUE) +
  geom_point(aes(x = param), col = 3)

ability1_pred <- as_tibble.spmirt.list(samples, iter/2, select = "theta") %>%
  dplyr::select(1:300) %>%
  summary() %>%
  mutate(param = data_geo$response[1:300],
         s1 = data_geo$s1[1:300],
         s2 = s1,
         estim = `50%`)
ability1_pred %>%
    ggplot(aes(s1, `50%`)) +
    geom_line() +
    geom_line(aes(s1, param, col = "real"))

vg <- gstat::variogram(estim ~ 1, ~ s1 + s2, ability1_pred, cutoff = 1, width = 0.01)
ggplot(vg, aes(dist, gamma)) +
  geom_point(aes(size = np)) +
  geom_smooth() +
  expand_limits(y = 0, x = 0) +
  scale_x_continuous(limits = c(0, 0.7))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Warning: Removed 30 rows containing missing values (geom_point).

ability2_pred <- as_tibble.spmirt.list(samples, iter/2, select = "theta") %>%
  dplyr::select(301:600) %>%
  summary() %>%
  mutate(param = data_geo$response[301:600],
         s1 = data_geo$s1[301:600],
         s2 = s1,
         estim = `50%`)
ability2_pred %>%
  ggplot(aes(s1, `50%`)) +
  geom_line() +
  geom_line(aes(s1, param, col = "real"))

vg <- gstat::variogram(estim ~ 1, ~ s1 + s2, ability2_pred, cutoff = 1, width = 0.005)
ggplot(vg, aes(dist, gamma)) +
  geom_point(aes(size = np)) +
  geom_smooth() +
  expand_limits(y = 0, x = 0) +
  scale_x_continuous(limits = c(0, 0.7))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 60 rows containing non-finite values (stat_smooth).
## Warning: Removed 60 rows containing missing values (geom_point).

# # # PREPARE DATA FOR MODELLING ---------------------------------------------------
# #
# # Y <- data_model %>% dplyr::select(id, response, response_label) %>%
# #   spread(response_label, response) %>%
# #   arrange(id) %>%
# #   dplyr::select(-id) %>%
# #   as.matrix()
# #
# # X <- data_model %>% dplyr::select(id, matches("^x[[:digit:]]+$")) %>%
# #   unique() %>%
# #   arrange(id) %>%
# #   dplyr::select(-id) %>%
# #   as.matrix()
# #
# # Beta <- matrix(beta, nrow = 1)
# # Sigma_proposal <- diag(1, 3)
# #
# # # RUN MODEL --------------------------------------------------------------------
# #
# # getwd()
# # Rcpp::sourceCpp("../src/multi-lm.cpp")
# # source("../R/ggplot-mcmc.R")
# #
# # iter <- 10^6
# # system.time(
# #   samples <- multi_lm(Y, X, iter, 0.01 * Sigma_proposal, 0.001 * Sigma_proposal)
# # )
# # samples %>% map(~ tail(.))
# #
# # # apply(samples$beta, 2, mean)
# # # cor(samples$beta)
# #
# # # Visualize traces
# # as_tibble(samples, 0, 100, select = "beta") %>%
# #   gg_trace(wrap = TRUE, alpha = 0.6)
# #
# # as_tibble(samples, 0, 100, select = "beta") %>% gg_trace(alpha = 0.6)
# # as_tibble(samples, 0, 100, select = "corr_chol") %>% gg_trace(alpha = 0.6)
# # as_tibble(samples, 0, 100, select = "corr") %>% gg_trace(alpha = 0.6)
# # as_tibble(samples, 0, 100, select = "sigmas") %>% gg_trace(alpha = 0.6)
# #
# # bla <- as_tibble(samples, iter/2, select = "sigmas")
# # cov(log(bla))
# # nrow(unique(bla)) / nrow(bla)
# #
# # bla <- as_tibble(samples, iter/2, select = "corr_chol")
# # cov(bla)
# # nrow(unique(bla)) / nrow(bla)
# #
# # # Visualize densities
# #
# # as_tibble(samples, iter / 2, select = "corr_chol") %>%
# #   gg_density(aes(fill = Parameters), scale = 2, alpha = 0.5, ridges = TRUE)
# #
# # as_tibble(samples, iter / 2, select = "corr") %>%
# #   gg_density(aes(fill = Parameters), scale = 1, alpha = 0.5, ridges = TRUE)
# #
# # # Visualize credible intervals
# # as_tibble(samples, iter / 2, select = "beta") %>%
# #   summary() %>%
# #   mutate(param = beta) %>%
# #   gg_errorbarh() +
# #   geom_point(aes(param, Parameters), col = 3)
# #
# # Corr_chol <- t(chol(Corr))
# # corr_chol <- Corr_chol[lower.tri(Corr_chol, diag = TRUE)]
# # corr <- Corr[lower.tri(Corr)]
# #
# # as_tibble(samples, iter / 2, select = "corr_chol") %>%
# #   summary() %>%
# #   mutate(param = corr_chol) %>%
# #   gg_errorbarh() +
# #   geom_point(aes(param, Parameters), col = 3)
# #
# # as_tibble(samples, iter / 2, select = "corr") %>%
# #   summary() %>%
# #   mutate(param = corr) %>%
# #   gg_errorbarh() +
# #   geom_point(aes(param, Parameters), col = 3)
# #
# #
# # as_tibble(samples, iter / 2 ,select = "sigmas") %>%
# #   summary() %>%
# #   mutate(param = sigmas) %>%
# #   gg_errorbarh() +
# #   geom_point(aes(param, Parameters), col = 3)
# #
# #
# # # Visualize credible intervals for all Parameters
# # as_tibble(samples, iter / 2) %>%
# #   summary() %>%
# #   mutate(param = c(beta, corr_chol, corr, sigmas)) %>%
# #   gg_errorbar() +
# #   geom_point(aes(Parameters, param), col = 3)
# #